package com.oracle.violet.first.etl

import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf, SparkContext}

object Sql4Parquet {
  def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf()
    sparkConf.setAppName("查询parquet文件中的内容")
    sparkConf.setMaster("local[*]")
    // 设置spark程序采用的序列化方式
    sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

    val sc = new SparkContext(sparkConf)

    val sQLContext = new SQLContext(sc)

    val dataFrame = sQLContext.read.parquet("H:\\甲骨文培训\\大数据\\Linux\\大纲\\项目二\\VIOLET")
    dataFrame.registerTempTable("adlog")

    sQLContext.sql("select sessionid, adplatformproviderid, provincename, cityname from adlog")
      .show(100)

    sc.stop()
  }


}
